---
title: "Curving the Resource Curse Analysis"
author: "Jonathan Pinckney"
date: "4/8/2020"
output:
  html_document:
    df_print: paged
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, 
                      warning = FALSE,
                      results = "asis")

# Load Necessary R Packages

library(tidyverse)
library(multiwayvcov)
library(stargazer)
library(margins)
library(readxl)
library(lme4)

# Load Primary Dataset

datasets <- read_rds("oil_gas_data.rds")

working.data <- datasets[[1]]

# Load NAVCO 2.1 to Check Number of Campaigns

navco <- datasets[[2]]

```

## Get Number of Campaigns in NAVCO 2.1 (referred to in main text)

```{r NAVCO}
navco %>%
  group_by(id) %>% 
  summarize(nv = max(prim_meth)) %>%
  ungroup() %>% 
  group_by(nv) %>% 
  summarize(num = n())
```

## Summary Statistics Table (Table 1 in Main Text)

```{r sumstats}
working.data %>%
  ungroup() %>%
  as.data.frame() %>% 
  select(oil_gas_valuePOP_2000_log,nv_onset,log_pop,log_gdppc,region.contag,election,nv_camp,
         leader.years_log,log.ex.group.num,phys.int,iccpr) %>% 
  stargazer(title = "Summary Statistics",
            covariate.labels = c("Oil Revenue (log)", "NV Campaign Onset", "Population (log)",
                                 "GDP per Capita (log)", "Regional Contagion",  "Election Year",  
                                 "Ongoing NV Campaign","Leader Years (log)","Excluded EPR Groups (log)",
                                 "Physical Integrity Rights","ICCPR Signatory"),
            omit.summary.stat = c("p25","p75"),
            label = "tab: sumstats",
            header = F,
            type = "html"
            )
```

## Cross-Tabulation of NV Onset and Mean/Median Oil and Gas Revenue (Table 2 in Main Text)

```{r}
working.data %>% 
  group_by(lead_nv_onset) %>% 
  filter(!(is.na(lead_nv_onset))) %>% 
  summarize(mean.oil = mean(oil_gas_valuePOP_2000,na.rm = T),
            median.oil = median(oil_gas_valuePOP_2000,na.rm = T)) %>%
  mutate(lead_nv_onset = if_else(lead_nv_onset == 0,"No","Yes")) %>% 
  rename(`NV Onset` = lead_nv_onset,`Mean Revenue` = mean.oil, `Median Revenue` = median.oil) %>% 
  as.data.frame() %>% 
  stargazer(title = "NV Onset and Oil/Gas Revenue Cross-Tabulations",
            label = "tab: crosstab",
            summary = F,
            rownames = F,
            header = F,
            type = "html"
              )
```

## Main Text Models and Regression Table (Table 3)

```{r main mods, warning = FALSE}
# Main Models

model1 <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log
              , family = binomial(link = 'logit'),data = working.data)
model1.ses <- sqrt(diag(cluster.vcov(model1, working.data$cowcode))) # Calculate Robust Clustered Standard Errors


model2 <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              , family = binomial(link = 'logit'),data = working.data)
model2.ses <- sqrt(diag(cluster.vcov(model2, working.data$cowcode))) # Calculate Robust Clustered Standard Errors


model3 <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr
              , family = binomial(link = 'logit'),data = working.data)
model3.ses <- sqrt(diag(cluster.vcov(model3, working.data$cowcode))) # Calculate Robust Clustered Standard Errors

model4 <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr
              + time.polys.1 + time.polys.2 + time.polys.3
              , family = binomial(link = 'logit'),data = working.data)
model4.ses <- sqrt(diag(cluster.vcov(model4, working.data$cowcode))) # Calculate Robust Clustered Standard Errors

model5 <- glmer(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr
              + time.polys.1 + time.polys.2 + time.polys.3
              + (1 | cowcode)
              , family = binomial(link = 'logit'),data = working.data)


stargazer(model1,model2,model3,model4,model5,
          se = list(model1.ses,model2.ses,model3.ses,model4.ses,summary(model5)$coefficients[,2]),
          dep.var.labels = "NV Campaign Onset",
          covariate.labels = c("Oil/Gas Revenue (log)","Oil/Gas Revenue (log) sq",
                               "Population (log)","GDP per capita (log)","Regional Contagion",
                               "Election Year","Ongoing Campaign","Leader Years in Power (log)",
                               "EPR Excluded Groups (log)","Physical Integrity Rights","ICCPR Signatory"),
          multicolumn = T,
          omit = "time",
          star.cutoffs = c(0.05,0.01,0.001),
          add.lines = list(c("Time Polynomials?","No","No","No","Yes","Yes")),
          omit.stat = "bic",
          label = "tab: main results",
          title = "Regression Analysis of Oil and Gas and Nonviolent Resistance Onset",
          header = F,
          type = "html"
          )

```

## Predicted Probability Figure (Figure 1 in Main Text)

```{r, results="hide"}
predprob <-cplot(model4, "oil_gas_valuePOP_2000_log",
                what = "prediction",
                rug = T,
                xlab = "Oil and Gas Revenue (logged)",
                ylab = "NV Onset Probability",
                draw = F
                )
```

```{r}
predprob_plot <- ggplot(data = predprob) + 
                 geom_ribbon(aes(x = xvals, ymin = lower,ymax = upper),fill = "grey") +
                 geom_line(aes(xvals,yvals)) + 
                 geom_rug(data = working.data,aes(x = oil_gas_valuePOP_2000_log)) +
                 labs(x = "Oil and Gas Revenue (log)",
                      y = "NV Onset Probability") +
  theme_bw()
predprob_plot
```



## Main Models LPM Replication (Appendix Table 1)

```{r}
model1.lm <- lm(lead_nv_onset ~ oil_gas_valuePOP_2000_log
             ,data = working.data)
model1.lm.ses <- sqrt(diag(cluster.vcov(model1.lm, working.data$cowcode)))


model2.lm <- lm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              ,data = working.data)
model2.lm.ses <- sqrt(diag(cluster.vcov(model2.lm, working.data$cowcode)))


model3.lm <- lm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr
              ,data = working.data)
model3.lm.ses <- sqrt(diag(cluster.vcov(model3.lm, working.data$cowcode)))

model4.lm <- lm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr
              + time.polys.1 + time.polys.2 + time.polys.3
              ,data = working.data)
model4.lm.ses <- sqrt(diag(cluster.vcov(model4.lm, working.data$cowcode)))

model5.lm <- lmer(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
                     + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr
                     + time.polys.1 + time.polys.2 + time.polys.3
                     + (1 | cowcode)
                     ,data = working.data)


stargazer(model1.lm,model2.lm,model3.lm,model4.lm,model5.lm,
          se = list(model1.lm.ses,model2.lm.ses,model3.lm.ses,model4.lm.ses,summary(model5.lm)$coefficients[,2]),
          dep.var.labels = "NV Campaign Onset",
          covariate.labels = c("Oil/Gas Revenue (log)","Oil/Gas Revenue (log) sq",
                               "Population (log)","GDP per capita (log)","Regional Contagion",
                               "Election Year","Ongoing Campaign","Leader Years in Power (log)",
                               "EPR Excluded Groups (log)","Physical Integrity Rights","ICCPR Signatory"),
          multicolumn = T,
          omit = "time",
          star.cutoffs = c(0.05,0.01,0.001),
          add.lines = list(c("Time Polynomials?","No","No","No","Yes","Yes")),
          omit.stat = "bic",
          label = "tab: lm robust",
          title = "Linear Probability Models of Oil and Gas and Nonviolent Resistance Onset",
          header = F,
          type = "html"
)

```

## Main Models Replication Limited to Ross era (1980 and later) (Appendix Table 2)


```{r, warning=FALSE,}
ross.period <- filter(working.data,year > 1979)


model1.ross <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log
              , family = binomial(link = 'logit'),data = ross.period)
model1.ross.ses <- sqrt(diag(cluster.vcov(model1.ross, ross.period$cowcode)))


model2.ross <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              , family = binomial(link = 'logit'),data = ross.period)
model2.ross.ses <- sqrt(diag(cluster.vcov(model2.ross, ross.period$cowcode)))


model3.ross <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr
              , family = binomial(link = 'logit'),data = ross.period)
model3.ross.ses <- sqrt(diag(cluster.vcov(model3.ross, ross.period$cowcode)))

model4.ross <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr
              + time.polys.1 + time.polys.2 + time.polys.3
              , family = binomial(link = 'logit'),data = ross.period)
model4.ross.ses <- sqrt(diag(cluster.vcov(model4.ross, ross.period$cowcode)))

model5.ross <- glmer(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
                     + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr
                     + time.polys.1 + time.polys.2 + time.polys.3
                     + (1 | cowcode)
                     , family = binomial(link = 'logit'),data = ross.period)


stargazer(model1.ross,model2.ross,model3.ross,model4.ross,model5.ross,
          se = list(model1.ross.ses,model2.ross.ses,model3.ross.ses,model4.ross.ses,summary(model5.ross)$coefficients[,2]),
          dep.var.labels = "NV Campaign Onset",
          covariate.labels = c("Oil/Gas Revenue (log)","Oil/Gas Revenue (log) sq",
                               "Population (log)","GDP per capita (log)","Regional Contagion",
                               "Election Year","Ongoing Campaign","Leader Years in Power (log)",
                               "EPR Excluded Groups (log)","Physical Integrity Rights","ICCPR Signatory"),
          multicolumn = T,
          omit = "time",
          star.cutoffs = c(0.05,0.01,0.001),
          add.lines = list(c("Time Polynomials?","No","No","No","Yes","Yes")),
          omit.stat = "bic",
          label = "tab: ross robust",
          title = "Regression Analysis of Oil and Gas and Nonviolent Resistance Onset (Post 1970s)",
          header = F,
          type = "html"
)

```

## Models With Past Failed Mobilization Control (Appendix Table 3)


```{r, warning=FALSE, results="asis"}

model1.ctrl <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr + nv.total.fails
              , family = binomial(link = 'logit'),data = working.data)
model1.ctrl.ses <- sqrt(diag(cluster.vcov(model1.ctrl, working.data$cowcode))) # Calculate Robust Clustered Standard Errors

model2.ctrl <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr  + nv.total.fails
              + time.polys.1 + time.polys.2 + time.polys.3
              , family = binomial(link = 'logit'),data = working.data)
model2.ctrl.ses <- sqrt(diag(cluster.vcov(model2.ctrl, working.data$cowcode))) # Calculate Robust Clustered Standard Errors

model3.ctrl <- glmer(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr  + nv.total.fails
              + time.polys.1 + time.polys.2 + time.polys.3
              + (1 | cowcode)
              , family = binomial(link = 'logit'),data = working.data)


stargazer(model1.ctrl,model2.ctrl,model3.ctrl,
          se = list(model1.ctrl.ses,model2.ctrl.ses,summary(model3.ctrl)$coefficients[,2]),
          dep.var.labels = "NV Campaign Onset",
          covariate.labels = c("Oil/Gas Revenue (log)","Oil/Gas Revenue (log) sq",
                               "Population (log)","GDP per capita (log)","Regional Contagion",
                               "Election Year","Ongoing Campaign","Leader Years in Power (log)",
                               "EPR Excluded Groups (log)","Physical Integrity Rights","ICCPR Signatory","Failed Campaigns"),
          multicolumn = T,
          omit = "time",
          star.cutoffs = c(0.05,0.01,0.001),
          add.lines = list(c("Time Polynomials?","No","Yes","Yes")),
          omit.stat = "bic",
          label = "tab: failed campaigns",
          title = "Replication with Failed Campaign Control",
          header = F,
          type = "html"
          )

```



## Testing Linear Effect With Geographic and Manufacturing Controls (Appendix Table 4)

```{r, warning=FALSE, results="asis"}

# Linear Models

model1.lin <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr + manufacturing + as.factor(region)
              , family = binomial(link = 'logit'),data = working.data)
model1.lin.ses <- sqrt(diag(cluster.vcov(model1.lin, working.data$cowcode))) # Calculate Robust Clustered Standard Errors

model2.lin <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr + manufacturing + as.factor(region)
              + time.polys.1 + time.polys.2 + time.polys.3
              , family = binomial(link = 'logit'),data = working.data)
model2.lin.ses <- sqrt(diag(cluster.vcov(model2.lin, working.data$cowcode))) # Calculate Robust Clustered Standard Errors


model3.lin <- glmer(lead_nv_onset ~ oil_gas_valuePOP_2000_log
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr + manufacturing + as.factor(region)
              + time.polys.1 + time.polys.2 + time.polys.3
              + (1 | cowcode)
              , family = binomial(link = 'logit'),data = working.data)



# Curvilinear Models


model1.curv <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr + manufacturing + as.factor(region)
              , family = binomial(link = 'logit'),data = working.data)
model1.curv.ses <- sqrt(diag(cluster.vcov(model1.curv, working.data$cowcode))) # Calculate Robust Clustered Standard Errors

model2.curv <- glm(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr + manufacturing + as.factor(region)
              + time.polys.1 + time.polys.2 + time.polys.3
              , family = binomial(link = 'logit'),data = working.data)
model2.curv.ses <- sqrt(diag(cluster.vcov(model2.curv, working.data$cowcode))) # Calculate Robust Clustered Standard Errors


model3.curv <- glmer(lead_nv_onset ~ oil_gas_valuePOP_2000_log + I(oil_gas_valuePOP_2000_log^2)
              + log_pop + log_gdppc + region.contag + election + nv_camp + leader.years_log + log.ex.group.num + phys.int + iccpr + manufacturing + as.factor(region)
              + time.polys.1 + time.polys.2 + time.polys.3
              + (1 | cowcode)
              , family = binomial(link = 'logit'),data = working.data)


stargazer(model1.lin,model1.curv,model2.lin,model2.curv,model3.lin,model3.curv,
          se = list(model1.lin.ses,model1.curv.ses,model2.lin.ses,model2.curv.ses,summary(model3.lin)$coefficients[,2],summary(model3.curv)$coefficients[,2]),
          dep.var.labels = "NV Campaign Onset",
          covariate.labels = c("Oil/Gas Revenue (log)","Oil/Gas Revenue (log) sq",
                               "Population (log)","GDP per capita (log)","Regional Contagion",
                               "Election Year","Ongoing Campaign","Leader Years in Power (log)",
                               "EPR Excluded Groups (log)","Physical Integrity Rights",
                               "ICCPR Signatory","Manufacturing (% GDP)"),
          multicolumn = T,
          omit = c("time","as.factor"),
          star.cutoffs = c(0.05,0.01,0.001),
          add.lines = list(c("Time Polynomials?","No","No","Yes","Yes","Yes","Yes"),c("Region FE?",rep("Yes",6))),
          omit.stat = "bic",
          label = "tab: extra controls",
          title = "Replication with Manufacturing and Regional Fixed Effects Controls",
          header = F,
          type = "html"
          )



```


## Complete List of Country-Years Included in the Data (Appendix Tables 5-9)

```{r}

working.data %>% 
  group_by(cowcode) %>% 
  arrange(year) %>% 
  mutate(`Country Name` = countrycode::countrycode(cowcode,"cown","country.name"),
         new.spell = if_else(row_number(cowcode) == 1,T,year != lag(year) + 1)) %>% 
  ungroup() %>% 
  arrange(cowcode,year) %>% 
  mutate(spell.num = cumsum(new.spell)) %>% 
  group_by(spell.num) %>% 
  summarize(firstyear = min(year),
            lastyear = max(year),
            `Country Name` = unique(`Country Name`)) %>% 
  mutate(spells = if_else(firstyear == lastyear, 
                          as.character(firstyear),
                          paste(as.character(firstyear),as.character(lastyear),sep = "-"))) %>% 
  ungroup() %>% 
  group_by(`Country Name`) %>% 
  summarize(spells = paste(unique(spells),collapse = ",")) %>% 
  stargazer(summary = F,
            rownames = F,
            header = F,
            title = "Full List of Country-Years",
            label = "tab: caselist",
            type = "html")


```

